use ../processed/Census_aggr.dta, clear

gen CL7 = (CL==7)
gen CL8 = (CL==8)
gen CL9 = (CL>=9)
local iv CL7 CL8 CL9
local maxs = 18

rename bpl statefip
merge m:1 statefip using ../data/region_division_state.dta
drop if _merge==2
assert _merge==3
rename statefip bpl

recode birthyr (1910/1914=1) (1915/1919=2) (1920/1924=3) (1925/1929=4) (1930/1934=5) (1935/1939=6), gen(cohort)
forvalues j=1/6{
	gen c`j' = cohort==`j'
}
forvalues j=1/9{
	gen d`j' = division==`j'
}
local ctrl c2 c3 c4 c5 c6 d2 d3 d4 d5 d6 d7 d8 d9

local ctrlx
foreach v in `ctrl'{
	gen yschl_`v' = yschl*`v'
	local ctrlx `ctrlx' yschl_`v'
}

local xlvls
forvalues j=1/`maxs'{
	gen x`j' = (yschl>=`j')
	local xlvls `xlvls' x`j'
}

gen xc = max(yschl-12,0)
local ctrlxc
foreach v in `ctrl'{
	gen xc_`v' = xc*`v'
	local ctrlxc `ctrlxc' xc_`v'
}


local op `" i.bpl [pw=wgt], a(birthyr)"'
// FS 
eststo: areg yschl `iv' `op'
predict z, xbd

// residual of x and x
areg z `op'
predict z_res, resid
areg yschl `op'
predict x_res, resid


// predict E[Y|X,W]
areg logwage yschl `ctrlx' `op'
predict mhat1, xbd
predict vhat1, resid
gen bhat = _b[yschl]
foreach v in `ctrl'{
	replace bhat = bhat + _b[yschl_`v']*`v'
}
sum bhat [aw=wgt], d
areg z_res yschl `ctrlx' `op'
predict zhat1, xbd

areg logwage yschl `xlvls' `ctrlx' `ctrlxc' `op'
predict mhat2, xbd
predict vhat2, resid
areg z_res yschl `xlvls' `ctrlx' `ctrlxc' `op'
predict zhat2, xbd

matrix A = J(6,2,.)
matrix colnames A = "Coefficient" "StdError"
egen cs = group(bpl birthyr)

corr x_res z_res [aw=wgt], covar
scalar m_xx = r(Var_1)
scalar m_xz = r(cov_12)

// OLS
reg logwage yschl i.bpl i.birthyr [pw=wgt], vce(cluster cs)
matrix A[1,1] = _b[yschl]
matrix A[1,2] = _se[yschl]
predict e_ols, resid

// IV
ivregress 2sls logwage (yschl=`iv') i.bpl i.birthyr [pw=wgt], vce(cluster cs)
matrix A[2,1] = _b[yschl]
matrix A[2,2] = _se[yschl]
predict e_iv, resid

// IV-OLS difference 
gen temp = e_iv*z_res/m_xz - e_ols*x_res/m_xx
reg temp [pw=wgt], vce(cluster cs)
drop temp
matrix A[3,1] = A[2,1]-A[1,1]
matrix A[3,2] = _se[_cons] 

// covariate weight difference
ivregress 2sls mhat1 (yschl=`iv') i.bpl i.birthyr [pw=wgt]
scalar b_c = _b[yschl]
predict e_c, resid
gen temp = ( e_c*z_res + vhat1*zhat1 )/m_xz - e_ols*x_res/m_xx
reg temp [pw=wgt], vce(cluster cs)
drop temp
matrix A[4,1] = b_c - A[1,1]
matrix A[4,2] = _se[_cons]  

// treatment-level weight difference
ivregress 2sls mhat2 (yschl=`iv') i.bpl i.birthyr [pw=wgt]
scalar b_ct = _b[yschl]
predict e_ct, resid
gen temp = ( (e_ct*z_res + vhat2*zhat2) - (e_c*z_res + vhat1*zhat1) )/m_xz
reg temp [pw=wgt], vce(cluster cs)
drop temp
matrix A[5,1] = b_ct - b_c
matrix A[5,2] = _se[_cons]  

// marginal effect difference
gen temp = ( e_iv*z_res - (e_ct*z_res + vhat2*zhat2) )/m_xz
reg temp [pw=wgt], vce(cluster cs)
drop temp
matrix A[6,1] = A[2,1] - b_ct
matrix A[6,2] = _se[_cons]  

clear
svmat A, names(col)
gen stat = ""
replace stat = "OLS" in 1
replace stat = "IV" in 2
replace stat = "IV-OLS" in 3
replace stat = "D_CW" in 4
replace stat = "D_TW" in 5
replace stat = "D_ME" in 6
order stat, first
export delimited using ../result/decom_base.csv, replace
